In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
/Users/lettyuy/opt/anaconda3/lib/python3.9/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.0
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
In [2]:
df = pd.read_csv("Hot 100.csv")
df['chart_date'] = pd.to_datetime(df['chart_date'])
df['chart_debut'] = pd.to_datetime(df['chart_debut'])
df.head()
Out[2]:
chart_position chart_date song performer song_id instance time_on_chart consecutive_weeks previous_week peak_position worst_position chart_debut chart_url
0 84 1990-05-05 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 1 NaN NaN 84 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-...
1 78 1990-05-12 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 2 1.0 84.0 78 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-...
2 68 1990-05-19 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 3 2.0 78.0 68 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-...
3 60 1990-05-26 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 4 3.0 68.0 60 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-...
4 58 1990-06-02 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 5 4.0 60.0 58 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-...
In [3]:
df['chart_year'] = df['chart_date'].dt.year
In [4]:
avg_chart_positions = df.groupby(['song', 'performer'])['chart_position'].mean().round().astype(int).reset_index()
avg_chart_positions = avg_chart_positions.rename(columns={'chart_position': 'avg_chart_position'})
df = pd.merge(df, avg_chart_positions, on=['song', 'performer'], how='left')
In [5]:
df_at_1 = df[df['chart_position'] == 1]
In [6]:
df_at_1.head()
Out[6]:
chart_position chart_date song performer song_id instance time_on_chart consecutive_weeks previous_week peak_position worst_position chart_debut chart_url chart_year avg_chart_position
455 1 1990-09-29 (Can't Live Without Your) Love And Affection Nelson (Can't Live Without Your) Love And AffectionNe... 1.0 13 12.0 4.0 1 93 1990-07-07 https://www.billboard.com/charts/hot-100/1990-... 1990 40
610 1 1991-07-27 (Everything I Do) I Do It For You Bryan Adams (Everything I Do) I Do It For YouBryan Adams 1.0 5 4.0 4.0 1 53 1991-06-29 https://www.billboard.com/charts/hot-100/1991-... 1991 22
611 1 1991-08-03 (Everything I Do) I Do It For You Bryan Adams (Everything I Do) I Do It For YouBryan Adams 1.0 6 5.0 1.0 1 53 1991-06-29 https://www.billboard.com/charts/hot-100/1991-... 1991 22
612 1 1991-08-10 (Everything I Do) I Do It For You Bryan Adams (Everything I Do) I Do It For YouBryan Adams 1.0 7 6.0 1.0 1 53 1991-06-29 https://www.billboard.com/charts/hot-100/1991-... 1991 22
613 1 1991-08-17 (Everything I Do) I Do It For You Bryan Adams (Everything I Do) I Do It For YouBryan Adams 1.0 8 7.0 1.0 1 53 1991-06-29 https://www.billboard.com/charts/hot-100/1991-... 1991 22
In [7]:
unique_songs_at_1 = df_at_1.groupby(['performer', 'song']).size().reset_index().rename(columns={0: 'count'})
performer_hits = unique_songs_at_1.groupby('performer').size()
performer_hits
Out[7]:
performer
'N Sync                         1
24kGoldn Featuring iann dior    1
2Pac Featuring K-Ci And JoJo    1
50 Cent                         1
50 Cent Featuring Nate Dogg     1
                               ..
Zager & Evans                   1
Zayn                            1
a-ha                            1
fun. Featuring Janelle Monae    1
matchbox twenty                 1
Length: 759, dtype: int64
In [8]:
one_hit_artists_list = performer_hits[performer_hits == 1].index.tolist()
In [9]:
df_one_hit_wonders = df[(df['chart_position'] == 1) & df['performer'].isin(one_hit_artists_list)]
df_one_hit_wonders = df_one_hit_wonders.drop_duplicates(subset=['song', 'performer'])
df_one_hit_wonders
Out[9]:
chart_position chart_date song performer song_id instance time_on_chart consecutive_weeks previous_week peak_position worst_position chart_debut chart_url chart_year avg_chart_position
455 1 1990-09-29 (Can't Live Without Your) Love And Affection Nelson (Can't Live Without Your) Love And AffectionNe... 1.0 13 12.0 4.0 1 93 1990-07-07 https://www.billboard.com/charts/hot-100/1990-... 1990 40
953 1 1987-05-02 (I Just) Died In Your Arms Cutting Crew (I Just) Died In Your ArmsCutting Crew 1.0 9 8.0 5.0 1 80 1987-03-07 https://www.billboard.com/charts/hot-100/1987-... 1987 31
1172 1 1987-11-28 (I've Had) The Time Of My Life Bill Medley & Jennifer Warnes (I've Had) The Time Of My LifeBill Medley & Je... 1.0 10 9.0 2.0 1 73 1987-09-26 https://www.billboard.com/charts/hot-100/1987-... 1987 30
1361 1 1980-12-27 (Just Like) Starting Over John Lennon (Just Like) Starting OverJohn Lennon 1.0 9 8.0 3.0 1 38 1980-11-01 https://www.billboard.com/charts/hot-100/1980-... 1980 17
1803 1 1968-03-16 (Sittin' On) The Dock Of The Bay Otis Redding (Sittin' On) The Dock Of The BayOtis Redding 1.0 8 7.0 3.0 1 67 1968-01-27 https://www.billboard.com/charts/hot-100/1968-... 1968 15
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
335000 1 2006-03-11 You're Beautiful James Blunt You're BeautifulJames Blunt 1.0 18 17.0 2.0 1 88 2005-11-12 https://www.billboard.com/charts/hot-100/2006-... 2006 25
335508 1 1975-02-15 You're No Good Linda Ronstadt You're No GoodLinda Ronstadt 1.0 11 10.0 2.0 1 86 1974-12-07 https://www.billboard.com/charts/hot-100/1975-... 1975 34
335690 1 1973-01-06 You're So Vain Carly Simon You're So VainCarly Simon 1.0 6 5.0 4.0 1 99 1972-12-02 https://www.billboard.com/charts/hot-100/1973-... 1973 19
335871 1 1978-06-10 You're The One That I Want John Travolta & Olivia Newton-John You're The One That I WantJohn Travolta & Oliv... 1.0 11 10.0 2.0 1 65 1978-04-01 https://www.billboard.com/charts/hot-100/1978-... 1978 28
336118 1 1971-07-31 You've Got A Friend James Taylor You've Got A FriendJames Taylor 1.0 9 8.0 3.0 1 80 1971-06-05 https://www.billboard.com/charts/hot-100/1971-... 1971 17

588 rows × 15 columns

In [10]:
artists_with_staying_power_list = performer_hits[performer_hits >= 3].index.tolist()
df_artists_with_staying_power = df[df['chart_position'] == 1 & df['performer'].isin(artists_with_staying_power_list)]
df_artists_with_staying_power = df_artists_with_staying_power.drop_duplicates(subset=['song', 'performer'])
df_artists_with_staying_power
Out[10]:
chart_position chart_date song performer song_id instance time_on_chart consecutive_weeks previous_week peak_position worst_position chart_debut chart_url chart_year avg_chart_position
610 1 1991-07-27 (Everything I Do) I Do It For You Bryan Adams (Everything I Do) I Do It For YouBryan Adams 1.0 5 4.0 4.0 1 53 1991-06-29 https://www.billboard.com/charts/hot-100/1991-... 1991 22
851 1 1965-07-10 (I Can't Get No) Satisfaction The Rolling Stones (I Can't Get No) SatisfactionThe Rolling Stones 1.0 5 4.0 2.0 1 67 1965-06-12 https://www.billboard.com/charts/hot-100/1965-... 1965 14
1484 1 1978-03-04 (Love Is) Thicker Than Water Andy Gibb (Love Is) Thicker Than WaterAndy Gibb 1.0 18 17.0 2.0 1 89 1977-11-05 https://www.billboard.com/charts/hot-100/1978-... 1978 28
1710 1 1976-09-11 (Shake, Shake, Shake) Shake Your Booty KC And The Sunshine Band (Shake, Shake, Shake) Shake Your BootyKC And T... 1.0 10 9.0 5.0 1 79 1976-07-10 https://www.billboard.com/charts/hot-100/1976-... 1976 25
2084 1 1970-07-25 (They Long To Be) Close To You Carpenters (They Long To Be) Close To YouCarpenters 1.0 6 5.0 3.0 1 56 1970-06-20 https://www.billboard.com/charts/hot-100/1970-... 1970 15
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
331740 1 1986-11-29 You Give Love A Bad Name Bon Jovi You Give Love A Bad NameBon Jovi 1.0 13 12.0 4.0 1 93 1986-09-06 https://www.billboard.com/charts/hot-100/1986-... 1986 36
332177 1 1974-11-02 You Haven't Done Nothin Stevie Wonder You Haven't Done NothinStevie Wonder 1.0 14 13.0 2.0 1 93 1974-08-03 https://www.billboard.com/charts/hot-100/1974-... 1974 27
332218 1 1966-11-19 You Keep Me Hangin' On The Supremes You Keep Me Hangin' OnThe Supremes 1.0 4 3.0 7.0 1 68 1966-10-29 https://www.billboard.com/charts/hot-100/1966-... 1966 18
333687 1 1976-09-04 You Should Be Dancing Bee Gees You Should Be DancingBee Gees 1.0 10 9.0 2.0 1 67 1976-07-03 https://www.billboard.com/charts/hot-100/1976-... 1976 30
335164 1 1991-04-20 You're In Love Wilson Phillips You're In LoveWilson Phillips 1.0 11 10.0 2.0 1 64 1991-02-09 https://www.billboard.com/charts/hot-100/1991-... 1991 29

381 rows × 15 columns

In [11]:
def get_top_10_per_year(group):
    return group.nlargest(10, 'consecutive_weeks')
In [12]:
df_one_hit_wonders['Source'] = 'One Hit Wonders'
df_artists_with_staying_power['Source'] = 'Artists with Staying Power'
In [13]:
top_10_one_hit_wonders_yearly = df_one_hit_wonders.groupby('chart_year').apply(get_top_10_per_year).reset_index(drop=True)
top_10_staying_power_yearly = df_artists_with_staying_power.groupby('chart_year').apply(get_top_10_per_year).reset_index(drop=True)
In [14]:
top_10_one_hit_wonders_yearly
Out[14]:
chart_position chart_date song performer song_id instance time_on_chart consecutive_weeks previous_week peak_position worst_position chart_debut chart_url chart_year avg_chart_position Source
0 1 1958-11-29 To Know Him, Is To Love Him The Teddy Bears To Know Him, Is To Love HimThe Teddy Bears 1.0 11 10.0 3.0 1 88 1958-09-20 https://www.billboard.com/charts/hot-100/1958-... 1958 24 One Hit Wonders
1 1 1958-11-08 It's Only Make Believe Conway Twitty It's Only Make BelieveConway Twitty 1.0 9 8.0 2.0 1 65 1958-09-13 https://www.billboard.com/charts/hot-100/1958-... 1958 22 One Hit Wonders
2 1 1958-11-15 Tom Dooley The Kingston Trio Tom DooleyThe Kingston Trio 1.0 8 7.0 2.0 1 83 1958-09-27 https://www.billboard.com/charts/hot-100/1958-... 1958 19 One Hit Wonders
3 1 1958-09-27 It's All In The Game Tommy Edwards It's All In The GameTommy Edwards 1.0 7 6.0 3.0 1 96 1958-08-16 https://www.billboard.com/charts/hot-100/1958-... 1958 19 One Hit Wonders
4 1 1958-08-23 Little Star The Elegants Little StarThe Elegants 1.0 4 3.0 2.0 1 18 1958-08-02 https://www.billboard.com/charts/hot-100/1958-... 1958 18 One Hit Wonders
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
518 1 2022-08-27 Super Freaky Girl Nicki Minaj Super Freaky GirlNicki Minaj 1.0 1 NaN NaN 1 1 2022-08-27 https://www.billboard.com/charts/hot-100/2022-... 2022 22 One Hit Wonders
519 1 2022-05-14 Wait For U Future Featuring Drake & Tems Wait For UFuture Featuring Drake & Tems 1.0 1 NaN NaN 1 1 2022-05-14 https://www.billboard.com/charts/hot-100/2022-... 2022 17 One Hit Wonders
520 1 2023-04-29 Kill Bill SZA Kill BillSZA 2.0 20 16.0 4.0 1 11 2022-12-24 https://www.billboard.com/charts/hot-100/2023-... 2023 3 One Hit Wonders
521 1 2023-03-18 Last Night Morgan Wallen Last NightMorgan Wallen 1.0 6 5.0 5.0 1 27 2023-02-11 https://www.billboard.com/charts/hot-100/2023-... 2023 4 One Hit Wonders
522 1 2023-04-08 Like Crazy Jimin Like CrazyJimin 1.0 1 NaN NaN 1 1 2023-04-08 https://www.billboard.com/charts/hot-100/2023-... 2023 56 One Hit Wonders

523 rows × 16 columns

In [15]:
top_10_combined_yearly = pd.concat([top_10_one_hit_wonders_yearly, top_10_staying_power_yearly])
In [16]:
top_10_combined_yearly.rename(columns={'time_on_chart': 'Time on Chart', 'avg_chart_position': 'Average Chart Position', 'song': 'Song'}, inplace=True)
In [17]:
all_years = top_10_combined_yearly['chart_year'].unique()
all_sources = ["One Hit Wonders", "Artists with Staying Power"]

expanded_data = []

for year in all_years:
    for Source in all_sources:
        subset = top_10_combined_yearly[(top_10_combined_yearly['chart_year'] == year) & (top_10_combined_yearly['Source'] == Source)]
        if subset.empty:
            expanded_data.append({
                'chart_year': year,
                'Source': Source,
                'Average Chart Position': np.nan,
                'Time on Chart': np.nan,
                'performer': f'Placeholder {Source} {year}'
            })
        else:
            expanded_data.extend(subset.to_dict('records'))

expanded_df = pd.DataFrame(expanded_data)
In [18]:
fig = px.scatter(
    expanded_df,
    x="Average Chart Position",
    y="Time on Chart",
    animation_frame="chart_year",
    animation_group="performer",
    hover_name="performer",
    hover_data={"Song": True, "Source": False, "chart_year": False},
    color="Source",
    size_max=55,
    range_x=[top_10_combined_yearly['Average Chart Position'].max(), top_10_combined_yearly['Average Chart Position'].min()],
    range_y=[0, top_10_combined_yearly['Time on Chart'].max()]
)

x_mid = 30
y_mid = 30

fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=x_mid,
        x1=x_mid,
        y0=0,
        y1=top_10_combined_yearly['Time on Chart'].max(),
        line=dict(color="Black", dash="dash", width=0.5)
    )
)

fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=top_10_combined_yearly['Average Chart Position'].max(),
        x1=top_10_combined_yearly['Average Chart Position'].min(),
        y0=y_mid,
        y1=y_mid,
        line=dict(color="Black", dash="dash", width=0.5)
    )
)

fig.add_annotation(
    text="Lower Rank, long duration",
    x=x_mid + (x_mid / 2),
    y=y_mid + (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="High rank, long duration",
    x=x_mid - (x_mid / 2),
    y=y_mid + (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="High rank, short duration",
    x=x_mid - (x_mid / 2),
    y=y_mid - (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="Lower rank, short duration",
    x=x_mid + (x_mid / 2),
    y=y_mid - (y_mid / 2),
    showarrow=False
)

fig.update_layout(
    xaxis_title="Average Chart Position",
    yaxis_title="Total Weeks at #1"
)

fig.show()